#!/usr/bin/env python
# -*- coding:utf-8 -*-

import random
import re
import time

from pyquery import PyQuery

from base.gsxt_base_worker import GsxtBaseWorker
from common import util
from common.global_field import Model


class GsxtLiaoNingWorker(GsxtBaseWorker):
    def __init__(self, **kwargs):
        GsxtBaseWorker.__init__(self, **kwargs)

    def get_search_result_html(self, keyword, session):
        param_list = []
        try:
            url = "http://{host}/saicpub/entPublicitySC/entPublicityDC/lngsSearchFpc!searchSolr.action".format(
                host=self.host)
            data = {
                'solrCondition': keyword,
                'authCode': 'finish',
                'currentPage': 1,
                'pageSize': 10,
            }

            # 如果验证失败则再次尝试
            r = self.task_request(session.post, url, data=data)
            if r is None:
                return param_list, self.SEARCH_ERROR

            json_data = util.json_loads(r.text)
            if json_data is None:
                return param_list, self.SEARCH_ERROR

            json_array = json_data.get('jsonArray', None)
            if json_array is None:
                return param_list, self.SEARCH_ERROR

            if len(json_array) <= 0:
                return param_list, self.SEARCH_NOTHING_FIND

            for item in json_array:
                if item is None:
                    continue
                pri_pid = item.get('pripid', None)
                ent_name = item.get('realEntName', None)
                ent_type = item.get('enttype', None)
                if pri_pid is None or pri_pid == '':
                    continue
                if ent_name is None or ent_name == '':
                    continue
                if ent_type is None or ent_type == '':
                    continue

                param = {
                    'pripid': pri_pid,
                    'entname': ent_name,
                    'enttype': ent_type,
                }
                param_list.append(param)
            return param_list, self.SEARCH_SUCCESS if len(param_list) > 0 else self.SEARCH_NOTHING_FIND
        except Exception as e:
            self.log.exception(e)

        return param_list, self.SEARCH_ERROR

    def get_year_info_list(self, text, ent_type):
        regex_url = '"anchedateStr":".*?","artid":"(.*?)","ancheyear":"(.*?)","nbstate":".*?"'
        pattern_obj = re.compile(regex_url)
        search_list = pattern_obj.findall(text)
        if search_list is not None:
            for obj in search_list:
                url = 'http://{host}/saicpub/entPublicitySC/entPublicityDC/nbDeatil.action?artId={artId}&entType={' \
                      'entType}'.format(host=self.host, artId=obj[0], entType=ent_type)
                yield obj[1], url

    @staticmethod
    def get_copmany_name(text):
        if text == '' or text is None:
            return None
        jq = PyQuery(text, parser='html')
        company = jq.find('.info-dl').eq(1).find('dd').text()
        return company.strip()

    def get_detail_html_list(self, seed, session, detail_list):
        data_list = []
        rank = 0
        for detail_param in detail_list:
            time.sleep(random.randint(1, 3))
            try:
                pri_pid = detail_param['pripid']
                ent_type = detail_param['enttype']

                base_info_url = "http://{host}/saicpub/entPublicitySC/entPublicityDC/getJbxxAction.action?pripid={" \
                                   "pripid}&type={type}".format(host=self.host, pripid=pri_pid, type=ent_type)

                # 基本信息
                base_info = self.task_request(session.get, base_info_url)
                if base_info is None:
                    continue

                company = self.get_copmany_name(base_info.text)
                if company == '' or company is None:
                    self.log.error('公司名称信息解析错误..{text}'.format(text=base_info.text))
                    continue

                # 建立数据模型
                rank += 1
                data = self.get_model(company, rank, seed, self.province)

                change_info_url = "http://{host}/saicpub/entPublicitySC/entPublicityDC/getBgxxAction.action?pripid={pripid}&type={enttype}".format(
                    host=self.host, pripid=pri_pid, enttype=ent_type)

                contributive_info_url = "http://{host}/saicpub/entPublicitySC/entPublicityDC/getTzrxxAction.action?pripid={pripid}&type={enttype}".format(
                    host=self.host, pripid=pri_pid, enttype=ent_type)

                key_person_info_url = "http://{host}/saicpub/entPublicitySC/entPublicityDC/getZyryxxActionAll.action?pripid={pripid}&type={enttype}".format(
                    host=self.host, pripid=pri_pid, enttype=ent_type)

                branch_info_url = "http://{host}/saicpub/entPublicitySC/entPublicityDC/getFgsxxAction.action?pripid={pripid}&type={enttype}".format(
                    host=self.host, pripid=pri_pid, enttype=ent_type)

                annual_info_url = "http://{host}/saicpub/entPublicitySC/entPublicityDC/getQygsQynbxxAction.action?pripid={pripid}&type={enttype}".format(
                    host=self.host, pripid=pri_pid, enttype=ent_type)

                liquidation_info_url = 'http://{host}/saicpub/entPublicitySC/entPublicityDC/getQsxxAction.action?pripid={pripid}&type={enttype}'.format(
                    host=self.host, pripid=pri_pid, enttype=ent_type)

                shareholder_info_url = 'http://{host}/saicpub/entPublicitySC/entPublicityDC/getTzrxxPojoList.action'.format(
                    host=self.host)
                shareholder_data = {
                    'pripid': pri_pid
                }

                # 存储数据
                self.append_model(data, Model.base_info, base_info_url, base_info.text)

                # 清算信息
                liquidation_info = self.task_request(session.get, liquidation_info_url)
                if liquidation_info is not None:
                    self.append_model(data, Model.liquidation_info, liquidation_info_url, liquidation_info.text)

                # 变更信息
                change_info = self.task_request(session.get, change_info_url)
                if change_info is not None:
                    self.append_model(data, Model.change_info, change_info_url, change_info.text)

                # 股东及出资信息
                shareholder_info = self.task_request(session.post, shareholder_info_url, data=shareholder_data)
                if shareholder_info is not None:
                    self.append_model(data, Model.shareholder_info, shareholder_info_url, shareholder_info.text,
                                      post_data=shareholder_data)

                # 出资信息
                contributive_info = self.task_request(session.get, contributive_info_url)
                if contributive_info is not None:
                    self.append_model(data, Model.contributive_info, contributive_info_url,
                                      contributive_info.text)

                # 主要人员
                key_person_info = self.task_request(session.get, key_person_info_url)
                if key_person_info is not None:
                    self.append_model(data, Model.key_person_info, key_person_info_url, key_person_info.text)

                # 分支机构
                branch_info = self.task_request(session.get, branch_info_url)
                if branch_info is not None:
                    self.append_model(data, Model.branch_info, branch_info_url, branch_info.text)

                # 年报信息
                annual_info = self.task_request(session.get, annual_info_url)
                if annual_info is not None:
                    for year, url in self.get_year_info_list(annual_info.text, ent_type):
                        annual_info = self.task_request(session.get, url)
                        if annual_info is None:
                            continue
                        self.append_model(data, Model.annual_info, url, annual_info.text, year=year,
                                          classify=Model.type_detail)

                data_list.append(data)
            except Exception as e:
                self.log.exception(e)

        self.target_db.insert_batch_data(self.target_table, data_list)
        return len(data_list)
